library(ezids)
## Warning in !is.null(rmarkdown::metadata$output) && rmarkdown::metadata$output
## %in% : 'length(x) = 2 > 1' in coercion to 'logical(1)'
#Creating a dataframe
Telecom_Data=data.frame(read.csv("Telecom Data.csv"))
str(Telecom_Data)
## 'data.frame': 51047 obs. of 58 variables:
## $ CustomerID : int 3000002 3000010 3000014 3000022 3000026 3000030 3000038 3000042 3000046 3000050 ...
## $ Churn : chr "Yes" "Yes" "No" "No" ...
## $ MonthlyRevenue : num 24 17 38 82.3 17.1 ...
## $ MonthlyMinutes : int 219 10 8 1312 0 682 26 98 24 1056 ...
## $ TotalRecurringCharge : int 22 17 38 75 17 52 30 66 35 75 ...
## $ DirectorAssistedCalls : num 0.25 0 0 1.24 0 0.25 0.25 2.48 0 0 ...
## $ OverageMinutes : int 0 0 0 0 0 0 0 0 0 0 ...
## $ RoamingCalls : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PercChangeMinutes : int -157 -4 -2 157 0 148 60 24 20 43 ...
## $ PercChangeRevenues : num -19 0 0 8.1 -0.2 -3.1 4 6.8 -0.3 2.4 ...
## $ DroppedCalls : num 0.7 0.3 0 52 0 9 0 0 0 0 ...
## $ BlockedCalls : num 0.7 0 0 7.7 0 1.7 1 0.3 0 0 ...
## $ UnansweredCalls : num 6.3 2.7 0 76 0 13 2.3 4 1 0 ...
## $ CustomerCareCalls : num 0 0 0 4.3 0 0.7 0 4 0 0 ...
## $ ThreewayCalls : num 0 0 0 1.3 0 0 0 0 0 0 ...
## $ ReceivedCalls : num 97.2 0 0.4 200.3 0 ...
## $ OutboundCalls : num 0 0 0.3 370.3 0 ...
## $ InboundCalls : num 0 0 0 147 0 0 0 0 1.7 0 ...
## $ PeakCallsInOut : num 58 5 1.3 555.7 0 ...
## $ OffPeakCallsInOut : num 24 1 3.7 303.7 0 ...
## $ DroppedBlockedCalls : num 1.3 0.3 0 59.7 0 10.7 1 0.3 0 0 ...
## $ CallForwardingCalls : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CallWaitingCalls : num 0.3 0 0 22.7 0 0.7 0 0 0 0 ...
## $ MonthsInService : int 61 58 60 59 53 53 57 59 53 55 ...
## $ UniqueSubs : int 2 1 1 2 2 1 2 2 3 1 ...
## $ ActiveSubs : int 1 1 1 2 2 1 2 2 3 1 ...
## $ ServiceArea : chr "SEAPOR503" "PITHOM412" "MILMIL414" "PITHOM412" ...
## $ Handsets : int 2 2 1 9 4 3 2 3 4 9 ...
## $ HandsetModels : int 2 1 1 4 3 2 2 3 3 5 ...
## $ CurrentEquipmentDays : int 361 1504 1812 458 852 231 601 464 544 388 ...
## $ AgeHH1 : int 62 40 26 30 46 28 52 46 36 46 ...
## $ AgeHH2 : int 0 42 26 0 54 0 58 46 34 68 ...
## $ ChildrenInHH : chr "No" "Yes" "Yes" "No" ...
## $ HandsetRefurbished : chr "No" "No" "No" "No" ...
## $ HandsetWebCapable : chr "Yes" "No" "No" "Yes" ...
## $ TruckOwner : chr "No" "No" "No" "No" ...
## $ RVOwner : chr "No" "No" "No" "No" ...
## $ Homeownership : chr "Known" "Known" "Unknown" "Known" ...
## $ BuysViaMailOrder : chr "Yes" "Yes" "No" "Yes" ...
## $ RespondsToMailOffers : chr "Yes" "Yes" "No" "Yes" ...
## $ OptOutMailings : chr "No" "No" "No" "No" ...
## $ NonUSTravel : chr "No" "No" "No" "No" ...
## $ OwnsComputer : chr "Yes" "Yes" "No" "No" ...
## $ HasCreditCard : chr "Yes" "Yes" "Yes" "Yes" ...
## $ RetentionCalls : int 1 0 0 0 0 0 0 0 0 0 ...
## $ RetentionOffersAccepted : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NewCellphoneUser : chr "No" "Yes" "Yes" "Yes" ...
## $ NotNewCellphoneUser : chr "No" "No" "No" "No" ...
## $ ReferralsMadeBySubscriber: int 0 0 0 0 0 0 0 0 0 0 ...
## $ IncomeGroup : int 4 5 6 6 9 1 9 6 9 5 ...
## $ OwnsMotorcycle : chr "No" "No" "No" "No" ...
## $ AdjustmentsToCreditRating: int 0 0 0 0 1 1 1 0 0 1 ...
## $ HandsetPrice : chr "30" "30" "Unknown" "10" ...
## $ MadeCallToRetentionTeam : chr "Yes" "No" "No" "No" ...
## $ CreditRating : chr "1-Highest" "4-Medium" "3-Good" "4-Medium" ...
## $ PrizmCode : chr "Suburban" "Suburban" "Town" "Other" ...
## $ Occupation : chr "Professional" "Professional" "Crafts" "Other" ...
## $ MaritalStatus : chr "No" "Yes" "Yes" "No" ...
#Changing data type of occupation and churn into factor
Telecom_Data$Churn <- as.factor(Telecom_Data$Churn)
Telecom_Data$Occupation <- as.factor(Telecom_Data$Occupation)
#Printing the structure and summary of data
str(Telecom_Data)
## 'data.frame': 51047 obs. of 58 variables:
## $ CustomerID : int 3000002 3000010 3000014 3000022 3000026 3000030 3000038 3000042 3000046 3000050 ...
## $ Churn : Factor w/ 2 levels "No","Yes": 2 2 1 1 2 1 1 1 1 1 ...
## $ MonthlyRevenue : num 24 17 38 82.3 17.1 ...
## $ MonthlyMinutes : int 219 10 8 1312 0 682 26 98 24 1056 ...
## $ TotalRecurringCharge : int 22 17 38 75 17 52 30 66 35 75 ...
## $ DirectorAssistedCalls : num 0.25 0 0 1.24 0 0.25 0.25 2.48 0 0 ...
## $ OverageMinutes : int 0 0 0 0 0 0 0 0 0 0 ...
## $ RoamingCalls : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PercChangeMinutes : int -157 -4 -2 157 0 148 60 24 20 43 ...
## $ PercChangeRevenues : num -19 0 0 8.1 -0.2 -3.1 4 6.8 -0.3 2.4 ...
## $ DroppedCalls : num 0.7 0.3 0 52 0 9 0 0 0 0 ...
## $ BlockedCalls : num 0.7 0 0 7.7 0 1.7 1 0.3 0 0 ...
## $ UnansweredCalls : num 6.3 2.7 0 76 0 13 2.3 4 1 0 ...
## $ CustomerCareCalls : num 0 0 0 4.3 0 0.7 0 4 0 0 ...
## $ ThreewayCalls : num 0 0 0 1.3 0 0 0 0 0 0 ...
## $ ReceivedCalls : num 97.2 0 0.4 200.3 0 ...
## $ OutboundCalls : num 0 0 0.3 370.3 0 ...
## $ InboundCalls : num 0 0 0 147 0 0 0 0 1.7 0 ...
## $ PeakCallsInOut : num 58 5 1.3 555.7 0 ...
## $ OffPeakCallsInOut : num 24 1 3.7 303.7 0 ...
## $ DroppedBlockedCalls : num 1.3 0.3 0 59.7 0 10.7 1 0.3 0 0 ...
## $ CallForwardingCalls : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CallWaitingCalls : num 0.3 0 0 22.7 0 0.7 0 0 0 0 ...
## $ MonthsInService : int 61 58 60 59 53 53 57 59 53 55 ...
## $ UniqueSubs : int 2 1 1 2 2 1 2 2 3 1 ...
## $ ActiveSubs : int 1 1 1 2 2 1 2 2 3 1 ...
## $ ServiceArea : chr "SEAPOR503" "PITHOM412" "MILMIL414" "PITHOM412" ...
## $ Handsets : int 2 2 1 9 4 3 2 3 4 9 ...
## $ HandsetModels : int 2 1 1 4 3 2 2 3 3 5 ...
## $ CurrentEquipmentDays : int 361 1504 1812 458 852 231 601 464 544 388 ...
## $ AgeHH1 : int 62 40 26 30 46 28 52 46 36 46 ...
## $ AgeHH2 : int 0 42 26 0 54 0 58 46 34 68 ...
## $ ChildrenInHH : chr "No" "Yes" "Yes" "No" ...
## $ HandsetRefurbished : chr "No" "No" "No" "No" ...
## $ HandsetWebCapable : chr "Yes" "No" "No" "Yes" ...
## $ TruckOwner : chr "No" "No" "No" "No" ...
## $ RVOwner : chr "No" "No" "No" "No" ...
## $ Homeownership : chr "Known" "Known" "Unknown" "Known" ...
## $ BuysViaMailOrder : chr "Yes" "Yes" "No" "Yes" ...
## $ RespondsToMailOffers : chr "Yes" "Yes" "No" "Yes" ...
## $ OptOutMailings : chr "No" "No" "No" "No" ...
## $ NonUSTravel : chr "No" "No" "No" "No" ...
## $ OwnsComputer : chr "Yes" "Yes" "No" "No" ...
## $ HasCreditCard : chr "Yes" "Yes" "Yes" "Yes" ...
## $ RetentionCalls : int 1 0 0 0 0 0 0 0 0 0 ...
## $ RetentionOffersAccepted : int 0 0 0 0 0 0 0 0 0 0 ...
## $ NewCellphoneUser : chr "No" "Yes" "Yes" "Yes" ...
## $ NotNewCellphoneUser : chr "No" "No" "No" "No" ...
## $ ReferralsMadeBySubscriber: int 0 0 0 0 0 0 0 0 0 0 ...
## $ IncomeGroup : int 4 5 6 6 9 1 9 6 9 5 ...
## $ OwnsMotorcycle : chr "No" "No" "No" "No" ...
## $ AdjustmentsToCreditRating: int 0 0 0 0 1 1 1 0 0 1 ...
## $ HandsetPrice : chr "30" "30" "Unknown" "10" ...
## $ MadeCallToRetentionTeam : chr "Yes" "No" "No" "No" ...
## $ CreditRating : chr "1-Highest" "4-Medium" "3-Good" "4-Medium" ...
## $ PrizmCode : chr "Suburban" "Suburban" "Town" "Other" ...
## $ Occupation : Factor w/ 8 levels "Clerical","Crafts",..: 5 5 2 4 5 4 7 5 4 5 ...
## $ MaritalStatus : chr "No" "Yes" "Yes" "No" ...
summary(Telecom_Data)
## CustomerID Churn MonthlyRevenue MonthlyMinutes
## Min. :3000002 No :36336 Min. : -6.17 Min. : 0.0
## 1st Qu.:3100632 Yes:14711 1st Qu.: 33.61 1st Qu.: 158.0
## Median :3201534 Median : 48.46 Median : 366.0
## Mean :3201957 Mean : 58.83 Mean : 525.7
## 3rd Qu.:3305376 3rd Qu.: 71.06 3rd Qu.: 723.0
## Max. :3399994 Max. :1223.38 Max. :7359.0
## NA's :156 NA's :156
## TotalRecurringCharge DirectorAssistedCalls OverageMinutes
## Min. :-11.00 Min. : 0.0000 Min. : 0.00
## 1st Qu.: 30.00 1st Qu.: 0.0000 1st Qu.: 0.00
## Median : 45.00 Median : 0.2500 Median : 3.00
## Mean : 46.83 Mean : 0.8952 Mean : 40.03
## 3rd Qu.: 60.00 3rd Qu.: 0.9900 3rd Qu.: 41.00
## Max. :400.00 Max. :159.3900 Max. :4321.00
## NA's :156 NA's :156 NA's :156
## RoamingCalls PercChangeMinutes PercChangeRevenues DroppedCalls
## Min. : 0.000 Min. :-3875.00 Min. :-1107.700 Min. : 0.000
## 1st Qu.: 0.000 1st Qu.: -83.00 1st Qu.: -7.100 1st Qu.: 0.700
## Median : 0.000 Median : -5.00 Median : -0.300 Median : 3.000
## Mean : 1.236 Mean : -11.55 Mean : -1.192 Mean : 6.011
## 3rd Qu.: 0.300 3rd Qu.: 66.00 3rd Qu.: 1.600 3rd Qu.: 7.700
## Max. :1112.400 Max. : 5192.00 Max. : 2483.500 Max. :221.700
## NA's :156 NA's :367 NA's :367
## BlockedCalls UnansweredCalls CustomerCareCalls ThreewayCalls
## Min. : 0.000 Min. : 0.00 Min. : 0.000 Min. : 0.0000
## 1st Qu.: 0.000 1st Qu.: 5.30 1st Qu.: 0.000 1st Qu.: 0.0000
## Median : 1.000 Median : 16.30 Median : 0.000 Median : 0.0000
## Mean : 4.086 Mean : 28.29 Mean : 1.869 Mean : 0.2988
## 3rd Qu.: 3.700 3rd Qu.: 36.30 3rd Qu.: 1.700 3rd Qu.: 0.3000
## Max. :384.300 Max. :848.70 Max. :327.300 Max. :66.0000
##
## ReceivedCalls OutboundCalls InboundCalls PeakCallsInOut
## Min. : 0.0 Min. : 0.00 Min. : 0.000 Min. : 0.00
## 1st Qu.: 8.3 1st Qu.: 3.30 1st Qu.: 0.000 1st Qu.: 23.00
## Median : 52.8 Median : 13.70 Median : 2.000 Median : 62.00
## Mean : 114.8 Mean : 25.38 Mean : 8.178 Mean : 90.55
## 3rd Qu.: 153.5 3rd Qu.: 34.00 3rd Qu.: 9.300 3rd Qu.: 121.30
## Max. :2692.4 Max. :644.30 Max. :519.300 Max. :2090.70
##
## OffPeakCallsInOut DroppedBlockedCalls CallForwardingCalls CallWaitingCalls
## Min. : 0.00 Min. : 0.00 Min. : 0.00000 Min. : 0.000
## 1st Qu.: 11.00 1st Qu.: 1.70 1st Qu.: 0.00000 1st Qu.: 0.000
## Median : 35.70 Median : 5.30 Median : 0.00000 Median : 0.300
## Mean : 67.65 Mean : 10.16 Mean : 0.01228 Mean : 1.841
## 3rd Qu.: 88.70 3rd Qu.: 12.30 3rd Qu.: 0.00000 3rd Qu.: 1.300
## Max. :1474.70 Max. :411.70 Max. :81.30000 Max. :212.700
##
## MonthsInService UniqueSubs ActiveSubs ServiceArea
## Min. : 6.00 Min. : 1.000 Min. : 0.000 Length:51047
## 1st Qu.:11.00 1st Qu.: 1.000 1st Qu.: 1.000 Class :character
## Median :16.00 Median : 1.000 Median : 1.000 Mode :character
## Mean :18.76 Mean : 1.532 Mean : 1.354
## 3rd Qu.:24.00 3rd Qu.: 2.000 3rd Qu.: 2.000
## Max. :61.00 Max. :196.000 Max. :53.000
##
## Handsets HandsetModels CurrentEquipmentDays AgeHH1
## Min. : 1.000 Min. : 1.000 Min. : -5.0 Min. : 0.00
## 1st Qu.: 1.000 1st Qu.: 1.000 1st Qu.: 205.0 1st Qu.: 0.00
## Median : 1.000 Median : 1.000 Median : 329.0 Median :36.00
## Mean : 1.806 Mean : 1.559 Mean : 380.5 Mean :31.34
## 3rd Qu.: 2.000 3rd Qu.: 2.000 3rd Qu.: 515.0 3rd Qu.:48.00
## Max. :24.000 Max. :15.000 Max. :1812.0 Max. :99.00
## NA's :1 NA's :1 NA's :1 NA's :909
## AgeHH2 ChildrenInHH HandsetRefurbished HandsetWebCapable
## Min. : 0.00 Length:51047 Length:51047 Length:51047
## 1st Qu.: 0.00 Class :character Class :character Class :character
## Median : 0.00 Mode :character Mode :character Mode :character
## Mean :21.14
## 3rd Qu.:42.00
## Max. :99.00
## NA's :909
## TruckOwner RVOwner Homeownership BuysViaMailOrder
## Length:51047 Length:51047 Length:51047 Length:51047
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## RespondsToMailOffers OptOutMailings NonUSTravel OwnsComputer
## Length:51047 Length:51047 Length:51047 Length:51047
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## HasCreditCard RetentionCalls RetentionOffersAccepted NewCellphoneUser
## Length:51047 Min. :0.0000 Min. :0.00000 Length:51047
## Class :character 1st Qu.:0.0000 1st Qu.:0.00000 Class :character
## Mode :character Median :0.0000 Median :0.00000 Mode :character
## Mean :0.0372 Mean :0.01828
## 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :4.0000 Max. :3.00000
##
## NotNewCellphoneUser ReferralsMadeBySubscriber IncomeGroup
## Length:51047 Min. : 0.00000 Min. :0.000
## Class :character 1st Qu.: 0.00000 1st Qu.:0.000
## Mode :character Median : 0.00000 Median :5.000
## Mean : 0.05207 Mean :4.325
## 3rd Qu.: 0.00000 3rd Qu.:7.000
## Max. :35.00000 Max. :9.000
##
## OwnsMotorcycle AdjustmentsToCreditRating HandsetPrice
## Length:51047 Min. : 0.00000 Length:51047
## Class :character 1st Qu.: 0.00000 Class :character
## Mode :character Median : 0.00000 Mode :character
## Mean : 0.05391
## 3rd Qu.: 0.00000
## Max. :25.00000
##
## MadeCallToRetentionTeam CreditRating PrizmCode
## Length:51047 Length:51047 Length:51047
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## Occupation MaritalStatus
## Other :37637 Length:51047
## Professional: 8755 Class :character
## Crafts : 1519 Mode :character
## Clerical : 986
## Self : 879
## Retired : 733
## (Other) : 538
#Subsetting Churned and Retained data
Churned <- subset(Telecom_Data, Churn=="Yes")
Retained <- subset(Telecom_Data, Churn=="No")
# Histogram for relationship between months in service and Churn
library(ggplot2)
ggplot(Churned, aes(x=MonthsInService, fill=Churn)) + geom_histogram(position='identity',alpha=0.6,color='aquamarine4',fill='aquamarine3')+xlab("Service period for churned customers (In Months) ")+ylab("Frequency") + theme_classic()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#+ggtitle("Service Months Distribution for Churned customers")
# Barplot for Prizm Code effect on Churn
ggplot(Telecom_Data, aes(x=PrizmCode, fill = Churn)) +geom_bar(position = "dodge2")+ggtitle("Churn distribution for Prizm code")
#install.packages("plotly")
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
colors <- c('rgb(211,94,96)', 'rgb(128,133,133)', 'rgb(144,103,167)', 'rgb(171,104,87)', 'rgb(114,147,203)')
fig <- plot_ly(type='pie', labels=Churned$PrizmCode, values=Churned$n,
textinfo='label+percent',
insidetextorientation='radial',marker = list(colors = colors,
line = list(color = '#FFFFFF', width = 1)))
fig
fig_1 <- plot_ly(type='pie', labels=Retained$PrizmCode, values=Retained$n,
textinfo='label+percent',
insidetextorientation='radial',marker = list(colors = colors,
line = list(color = '#FFFFFF', width = 1)))
fig_1
#Frequency distribution of Occupation
ggplot(Telecom_Data,aes(x=Occupation)) + geom_bar(fill = "bisque") + ggtitle("Frequency distribution of occupation")
#Creating a contingency table for Occupation and Churn
Occupation_Churn<-table(Telecom_Data$Occupation,Telecom_Data$Churn)
str(Occupation_Churn)
## 'table' int [1:8, 1:2] 697 1093 106 26705 6288 548 636 263 289 426 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:8] "Clerical" "Crafts" "Homemaker" "Other" ...
## ..$ : chr [1:2] "No" "Yes"
#Performing Chi Square Test to check if occupation is independent of churn
chisq_test=chisq.test(Occupation_Churn)
chisq_test
##
## Pearson's Chi-squared test
##
## data: Occupation_Churn
## X-squared = 10.316, df = 7, p-value = 0.1714
p_value=chisq_test$p.value
p_value
## [1] 0.1713543
Occupation is independent of churn.
#Analyzing dropped calls and blocked calls effect on Churn
qqnorm(Churned$DroppedCalls,xlab="DroppedCalls",col="blue",main="A QQ Plot of dropped calls for churned customers")
qqline(Churned$DroppedCalls, col="green")
qqnorm(Retained$DroppedCalls,xlab="DroppedCalls",col="blue",main="A QQ Plot of dropped calls for retained customers")
qqline(Retained$DroppedCalls, col="green")
qqnorm(Churned$BlockedCalls,xlab="BlockedCalls",col="orange",main="A QQ Plot of blocked calls for churned customers")
qqline(Churned$BlockedCalls, col="green")
qqnorm(Retained$BlockedCalls,xlab="BlockedCalls",col="orange",main="A QQ Plot of blocked calls for retained customers")
qqline(Retained$BlockedCalls, col="green")